# Make sure to have these packages installed before runnig the code below
library(tidyverse); library(here); library(kableExtra); library(lme4)
library(brms); library(rptR); library(partR2); library(easystats)
library(ordinal); library(ggdist); library(tidyverse); library(ggthemes)
library(patchwork); library(tidybayes); library(visibly)Simulations for individual differences in group foraging in manta rays
1 Rationale
TODO
2 Packages Necessary
3 Simulate variation in leadership preferences
3.1 Setup and individual assignment to foraging groups
We imagine a closed population of 100 mantas observed foraging in 100 instances. For simplicity, lets say that mantas always forage in groups of 10 individuals but the identity of each individual present in a given group shifts from observation to observation.
Code
# Seed for reproducible simulation
set.seed(42)
# Value storage
N_ID = 100
N_obs = 100
Group_size = 10
# Dataset structure
dfsim = data.frame(Obs = rep(1:N_obs, each = N_ID),
Id = rep(1:N_ID, N_obs),
# Sample integers from 1 to 10 and repeat 100 times
Group = rep(sample.int(n = Group_size, size = Group_size,
replace = F), N_obs),
Group_N = 10) %>%
mutate(Group_ID = paste(Group, Obs, sep = "_"))
dfsim %>% head(15) %>% kable()| Obs | Id | Group | Group_N | Group_ID |
|---|---|---|---|---|
| 1 | 1 | 1 | 10 | 1_1 |
| 1 | 2 | 5 | 10 | 5_1 |
| 1 | 3 | 10 | 10 | 10_1 |
| 1 | 4 | 8 | 10 | 8_1 |
| 1 | 5 | 2 | 10 | 2_1 |
| 1 | 6 | 4 | 10 | 4_1 |
| 1 | 7 | 6 | 10 | 6_1 |
| 1 | 8 | 9 | 10 | 9_1 |
| 1 | 9 | 7 | 10 | 7_1 |
| 1 | 10 | 3 | 10 | 3_1 |
| 1 | 11 | 1 | 10 | 1_1 |
| 1 | 12 | 5 | 10 | 5_1 |
| 1 | 13 | 10 | 10 | 10_1 |
| 1 | 14 | 8 | 10 | 8_1 |
| 1 | 15 | 2 | 10 | 2_1 |
Code
# Save data
write.csv2(dfsim, here("data/data_sim.csv"))We now have each individual assigned to a foraging group. We then need to figure out a way to assign ranks corresponding to an individual’s position within the group (1: at the front, 2: second from the front, …).
3.2 Individual preference for position in group
The simplest way to rank individuals is to give them a preference score for belonging to the front or not according to a normal distribution. Here, we assume that this preference is sampled from a normal distribution of mean 0 and a standard deviation of 1 (\(N(0, 1)\)). We can store this information in a dataframe format
We then merge the column pref into the dfsim dataframe
dfsim = merge(dfsim, ID)Based on this preference score, we assign a rank to each individuals within each foraging observation and group. Because the way the rank() function works, individuals with the most negative value are more likely to get a lower rank value.
Code
| Id | Obs | Group | Group_N | Group_ID | pref | rank |
|---|---|---|---|---|---|---|
| 1 | 1 | 1 | 10 | 1_1 | 1.3709584 | 8 |
| 11 | 1 | 1 | 10 | 1_1 | 1.3048697 | 7 |
| 21 | 1 | 1 | 10 | 1_1 | -0.3066386 | 3 |
| 31 | 1 | 1 | 10 | 1_1 | 0.4554501 | 6 |
| 41 | 1 | 1 | 10 | 1_1 | 0.2059986 | 4 |
| 51 | 1 | 1 | 10 | 1_1 | 0.3219253 | 5 |
| 61 | 1 | 1 | 10 | 1_1 | -0.3672346 | 2 |
| 71 | 1 | 1 | 10 | 1_1 | -1.0431189 | 1 |
| 81 | 1 | 1 | 10 | 1_1 | 1.5127070 | 10 |
| 91 | 1 | 1 | 10 | 1_1 | 1.3921164 | 9 |
| 5 | 1 | 2 | 10 | 2_1 | 0.4042683 | 8 |
| 15 | 1 | 2 | 10 | 2_1 | -0.1333213 | 6 |
| 25 | 1 | 2 | 10 | 2_1 | 1.8951935 | 10 |
| 35 | 1 | 2 | 10 | 2_1 | 0.5049551 | 9 |
| 45 | 1 | 2 | 10 | 2_1 | -1.3682810 | 1 |
4 Data analysis
I’m comparing 3 GLMM distribution families for analyzing individual position in group: * A binomial GLMM where position is bounded by group size * A Poisson GLMM where position is not bounded by group size * A cumulative link GLMM where ranks are considered discrete ordered categories and are bounded by group size
The last model is probably the most accurate but is also the most difficult to work with! By comparing those models, my goal is to figure out if a binomial or Poisson GLMM is sufficient to recover the simulated parameter estimates and to properly predict the distribution of ranks within groups. While calculating repeatability for binomial and Poisson GLMM has been well resolved, there are no known formula for the cumulative link GLMM (that I know of!).
4.1 Binomial GLMM for the probability of leading a foraging groups
dfsim = dfsim %>%
mutate(lead = case_when(rank == 1 ~ 1,
rank > 1 ~ 0))
glmm.lead = glmer(lead ~ 1 + (1|Id),
family = "binomial",
data = dfsim)
summary(glmm.lead)Generalized linear mixed model fit by maximum likelihood (Laplace
Approximation) [glmerMod]
Family: binomial ( logit )
Formula: lead ~ 1 + (1 | Id)
Data: dfsim
AIC BIC logLik deviance df.resid
39.5 54.0 -17.8 35.5 9998
Scaled residuals:
Min 1Q Median 3Q Max
-0.000053 -0.000053 -0.000053 -0.000053 0.004657
Random effects:
Groups Name Variance Std.Dev.
Id (Intercept) 14030 118.4
Number of obs: 10000, groups: Id, 100
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -19.69 2.41 -8.169 3.11e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
| Parameter | Coefficient | CI | CI_low | CI_high | z | p | Effects | Group | Std_Coefficient | Std_Coefficient_CI_low | Std_Coefficient_CI_high | Fit | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | (Intercept) | -19.6880 | 0.95 | -24.41162 | -14.96439 | -8.169119 | 0 | fixed | -19.688 | -24.41162 | -14.96439 | NA | |
| 2 | NA | 118.4474 | 0.95 | NA | NA | NA | NA | random | Id | NA | NA | NA | NA |
| 3 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
| 4 | AIC | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 39.5331704 |
| 5 | AICc | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 39.5343708 |
| 6 | BIC | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 53.9538512 |
| 7 | R2 (conditional) | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0.9997656 |
| 8 | R2 (marginal) | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0.0000000 |
| 11 | Sigma | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 1.0000000 |
| 12 | Log_loss | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0.0000022 |
|
|
4.2 Binomial GLMM for position within foraging groups
glmm.rank.bin = glmer(cbind(rank, Group_N) ~ 1 + (1|Id),
family = "binomial",
data = dfsim)
summary(glmm.rank.bin)Generalized linear mixed model fit by maximum likelihood (Laplace
Approximation) [glmerMod]
Family: binomial ( logit )
Formula: cbind(rank, Group_N) ~ 1 + (1 | Id)
Data: dfsim
AIC BIC logLik deviance df.resid
30416.2 30430.6 -15206.1 30412.2 9998
Scaled residuals:
Min 1Q Median 3Q Max
-0.032188 -0.005681 0.002057 0.005625 0.007396
Random effects:
Groups Name Variance Std.Dev.
Id (Intercept) 0.4754 0.6895
Number of obs: 10000, groups: Id, 100
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.78954 0.06926 -11.4 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Can't calculate log-loss.
Can't calculate proper scoring rules for models without integer response values.
| Parameter | Coefficient | CI | CI_low | CI_high | z | p | Effects | Group | Std_Coefficient | Std_Coefficient_CI_low | Std_Coefficient_CI_high | Fit | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | (Intercept) | -0.7895394 | 0.95 | -0.9252816 | -0.6537971 | -11.40005 | 0 | fixed | -0.7895394 | -0.9252816 | -0.6537971 | NA | |
| 2 | NA | 0.6895083 | 0.95 | NA | NA | NA | NA | random | Id | NA | NA | NA | NA |
| 3 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
| 4 | AIC | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 3.041616e+04 |
| 5 | AICc | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 3.041616e+04 |
| 6 | BIC | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 3.043058e+04 |
| 7 | R2 (conditional) | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 1.262643e-01 |
| 8 | R2 (marginal) | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0.000000e+00 |
| 11 | Sigma | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 1.000000e+00 |
Code
check_model(glmm.rank.bin)4.3 Poisson GLMM for position within foraging groups
glmm.rank.poiss = glmer(rank ~ 1 + (1|Id),
family = "poisson",
data = dfsim)
summary(glmm.rank.poiss)Generalized linear mixed model fit by maximum likelihood (Laplace
Approximation) [glmerMod]
Family: poisson ( log )
Formula: rank ~ 1 + (1 | Id)
Data: dfsim
AIC BIC logLik deviance df.resid
34608.4 34622.8 -17302.2 34604.4 9998
Scaled residuals:
Min 1Q Median 3Q Max
-0.030633 -0.004974 0.001646 0.004199 0.005237
Random effects:
Groups Name Variance Std.Dev.
Id (Intercept) 0.4764 0.6902
Number of obs: 10000, groups: Id, 100
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.51255 0.06928 21.83 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
| Parameter | Coefficient | CI | CI_low | CI_high | z | df_error | p | Effects | Group | Std_Coefficient | Std_Coefficient_CI_low | Std_Coefficient_CI_high | Fit | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | (Intercept) | 1.5125501 | 0.95 | 1.376771 | 1.648329 | 21.83363 | Inf | 0 | fixed | 1.51255 | 1.376771 | 1.648329 | NA | |
| 2 | NA | 0.6902276 | 0.95 | NA | NA | NA | NA | NA | random | Id | NA | NA | NA | NA |
| 3 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
| 4 | AIC | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 3.460840e+04 |
| 5 | AICc | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 3.460840e+04 |
| 6 | BIC | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 3.462282e+04 |
| 7 | R2 (conditional) | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 7.052244e-01 |
| 8 | R2 (marginal) | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0.000000e+00 |
| 11 | Sigma | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 1.000000e+00 |
Code
check_model(glmm.rank.poiss)4.4 Cumulative Link Mixed Model for position within foraging groups
We have two options for CLMMs in R: the ordinal package which fits CLMMs in a frequentist framework and brms which uses a Bayesian approach. I have a strong preference for working with brms given its flexibility and lots of functionality for extracting parameter values and plotting. In contrast, ordinal only handles 1 random effect within a model and several utility functions such as predict() are not implemented yet.
dfsim$rank.f = as.factor(dfsim$rank)
clmm.rank = clmm(rank.f ~ 1 + (1|Id), data = dfsim, threshold = "equidistant")
clmm.rankCumulative Link Mixed Model fitted with the Laplace approximation
formula: rank.f ~ 1 + (1 | Id)
data: dfsim
link threshold nobs logLik AIC niter max.grad
logit equidistant 10000 -220.31 446.63 401(12553) 3.13e+00
Random effects:
Groups Name Variance Std.Dev.
Id (Intercept) 14874 122
Number of groups: Id 100
No Coefficients
Thresholds:
threshold.1 spacing
-68.22 26.50
| Parameter | Median | CI | CI_low | CI_high | pd | Rhat | ESS | Std_Median | Std_Median_CI_low | Std_Median_CI_high | Fit | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Intercept[1] | -194.771500 | 0.95 | -284.9987000 | -130.7574250 | 1.000 | 1.058004 | 74.93820 | -194.771500 | -284.9987000 | -130.7574250 | NA |
| 2 | Intercept[2] | -121.846500 | 0.95 | -184.0386750 | -77.2297625 | 1.000 | 1.071357 | 54.33070 | -121.846500 | -184.0386750 | -77.2297625 | NA |
| 3 | Intercept[3] | -70.389900 | 0.95 | -119.9223750 | -33.4036200 | 1.000 | 1.082688 | 46.55981 | -70.389900 | -119.9223750 | -33.4036200 | NA |
| 4 | Intercept[4] | -30.046850 | 0.95 | -69.8278100 | 0.4246816 | 0.967 | 1.075347 | 37.56906 | -30.046850 | -69.8278100 | 0.4246816 | NA |
| 5 | Intercept[5] | 0.607018 | 0.95 | -30.0850875 | 30.0031875 | 0.559 | 1.087004 | 31.68301 | 0.607018 | -30.0850875 | 30.0031875 | NA |
| 6 | Intercept[6] | 34.452750 | 0.95 | 0.4793536 | 70.2040600 | 0.981 | 1.089448 | 35.42682 | 34.452750 | 0.4793536 | 70.2040600 | NA |
| 7 | Intercept[7] | 75.545850 | 0.95 | 37.4468850 | 125.7313750 | 1.000 | 1.059517 | 51.64699 | 75.545850 | 37.4468850 | 125.7313750 | NA |
| 8 | Intercept[8] | 127.410000 | 0.95 | 87.5496025 | 193.9234500 | 1.000 | 1.058296 | 85.66555 | 127.410000 | 87.5496025 | 193.9234500 | NA |
| 9 | Intercept[9] | 201.627000 | 0.95 | 145.3978750 | 310.4558250 | 1.000 | 1.030180 | 141.10004 | 201.627000 | 145.3978750 | 310.4558250 | NA |
| 10 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
| 11 | ELPD | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | -5.5245981 |
| 13 | LOOIC | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 11.0491963 |
| 15 | WAIC | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 11.0477429 |
| 16 | R2 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0.9999995 |
| 17 | R2 (marginal) | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0.0000000 |
| 19 | Sigma | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 1.0000000 |
pp_check(brms.rank, ndraws = 100)5 Comparing the predictive accuracy of the models
We can fit all these models as Bayesian models and compare posterior predictive checks to compare whether one model predicts the data more accurately. We first need to define some reasonable priors and explore prior predictive checks (i.e. how the model might predict data points without having seen the data yet, taking information only from the priors).
bf.bin = bf(rank | trials(Group_N) ~ 1 + (1|Id),
family = "binomial")
bf.poiss = bf(rank ~ 1 + (1|Id),
family = "poisson")
bf.clmm = bf(rank ~ 1 + (1|Id),
family = "cumulative")
priors.bin <-
# Intercepts priors
prior(normal(.2, .05), class = Intercept) +
# Random effects priors (default to exp(1))
prior(exponential(1), class = sd)
priors.poiss <-
# Intercepts priors
prior(normal(1.6, .1), class = Intercept, lb = 0) +
# Random effects priors
prior(exponential(1), class = sd)
priors.clmm <-
# Intercepts priors (default to exp(1))
prior(normal(0, 1), class = Intercept) +
# Random effects priors (default to exp(1))
prior(exponential(1), class = sd)Without getting too much into the details, I’m choosing priors based on the expected average position in a group of 10 individuals. With the binomial GLMM, this translate into the probability of occupying a rank between 1 and 10 in a group of 10 = 0.55. This corresponds to an intercept value of 0.2 on the logit scale. Similarly, the mean position in a group of 10 individual is 5.5, which correspond to an intercept of 1.5 on the exponential scale.
We then apply these priors to the simulated data structure
brms.bin.prior = brm(bf.bin,
data = dfsim,
prior = priors.bin,
sample_prior = "only",
warmup = 1000,
iter = 2000,
seed = 42,
cores = 8,
threads = threading(8),
control = list(adapt_delta = .99,
max_treedepth = 15),
backend = "cmdstanr",
file_refit = "always",
file = here("outputs/mods/brms.bin.prior"))
brms.poiss.prior = brm(bf.poiss,
data = dfsim,
prior = priors.poiss,
sample_prior = "only",
warmup = 1000,
iter = 2000,
seed = 42,
cores = 8,
threads = threading(8),
control = list(adapt_delta = .99,
max_treedepth = 15),
backend = "cmdstanr",
file_refit = "always",
file = here("outputs/mods/brms.poiss.prior"))
brms.clmm.prior = brm(bf.clmm,
data = dfsim,
prior = priors.clmm,
sample_prior = "only",
warmup = 1000,
iter = 2000,
seed = 42,
cores = 8,
threads = threading(8),
control = list(adapt_delta = .99,
max_treedepth = 15),
backend = "cmdstanr",
file_refit = "always",
file = here("outputs/mods/brms.clmm.prior"))And next plot draws from the prior distribution
plot.bin = pp_check(brms.bin.prior, ndraws = 500) +
xlim(0, 15) +
ggtitle("Binomial GLMM")
plot.poiss = pp_check(brms.poiss.prior, ndraws = 500) +
xlim(0, 15) +
ggtitle("Poisson GLMM")
plot.clmm = pp_check(brms.clmm.prior, ndraws = 500) +
xlim(0, 15) +
ggtitle("Cumulative Link Mixed Model")
plot.priorpcheck = (plot.bin / plot.poiss / plot.clmm) +
plot_annotation("Prior-predictive checks") &
theme_bw(14)
plot.priorpcheckWe can already spot some potential issues! Both binomial and Poisson GLMMs predict ranks of 0, which is impossible given our data structure. The cumulative link mixed model seems to fit the data more correctly instead.
Using the same priors, we now fit these models to the simulated data
brms.bin = brm(bf.bin,
data = dfsim,
prior = priors.bin,
warmup = 5000,
iter = 6000,
seed = 42,
cores = 8,
threads = threading(8),
control = list(adapt_delta = .99,
max_treedepth = 15),
backend = "cmdstanr",
file_refit = "always",
file = here("outputs/mods/brms.bin"))
brms.poiss = brm(bf.poiss,
data = dfsim,
prior = priors.poiss,
warmup = 5000,
iter = 6000,
seed = 42,
cores = 8,
threads = threading(8),
control = list(adapt_delta = .99,
max_treedepth = 15),
backend = "cmdstanr",
file_refit = "always",
file = here("outputs/mods/brms.poiss"))
brms.clmm = brm(bf.clmm,
data = dfsim,
prior = priors.clmm,
warmup = 5000,
iter = 6000,
seed = 42,
cores = 8,
threads = threading(8),
control = list(adapt_delta = .99,
max_treedepth = 15),
backend = "cmdstanr",
file_refit = "always",
file = here("outputs/mods/brms.clmm"))And compare predictive checks based on draws from the posterior distribution
plot.bin = pp_check(brms.bin, ndraws = 500) +
xlim(0, 10) +
ggtitle("Binomial GLMM")
plot.poiss = pp_check(brms.poiss, ndraws = 500) +
xlim(0, 10) +
ggtitle("Poisson GLMM")
plot.clmm = pp_check(brms.clmm, ndraws = 500) +
xlim(0, 10) +
ggtitle("Cumulative Link Mixed Model")
plot.ppcheck = (plot.bin / plot.poiss / plot.clmm) +
plot_annotation("Posterior-predictive checks") &
theme_bw(14)
plot.ppcheckThe issues noted above persist with the simulated data. We now to figure out how much of an issue this discrepancy might be in the context of estimating individual differences in individual position within foraging groups. There are also a number of convergence issues with the binomial and Poisson approaches that we don’t encounter with the CLMM model (see R\test.brms.R file for the full report) .
5.1 Estimation of among-individual variance
Vi.bin = brms.bin %>%
spread_draws(sd_Id__Intercept)
Vi.poiss = brms.poiss %>%
spread_draws(sd_Id__Intercept)
Vi.clmm = brms.clmm %>%
spread_draws(sd_Id__Intercept)
df.Vi = data.frame(
Vi = c(Vi.bin$sd_Id__Intercept,
Vi.poiss$sd_Id__Intercept,
Vi.clmm$sd_Id__Intercept),
Mod = factor(c(rep("Binomial", length(Vi.bin$sd_Id__Intercept)),
rep("Poisson", length(Vi.poiss$sd_Id__Intercept)),
rep("CLMM", length(Vi.clmm$sd_Id__Intercept))),
levels = c("Binomial", "Poisson", "CLMM")))
plot.Vi = df.Vi %>%
ggplot(aes(x = Vi, y = Mod, fill = Mod)) +
stat_halfeye() +
scale_fill_wsj() +
xlab("Among-individual variance") +
ylab("Model") +
theme_bw(14) +
theme(legend.position = "none")
plot.ViAs we can see, the variances have little in common between the three models. An added difficulty is that have a firm grasp on what the simulated variance ought to be. Individuals were given a value sampled from a \(Normal(0, 1)\) distribution but were then ranked at each observation. We can calculate the variance for mean individual rank as a proxy for the true among-individual variance though
dfsim %>%
group_by(Id, Obs) %>%
mutate(mean_rank = mean(rank)) %>%
ungroup() %>%
summarise(Vi = var(mean_rank))# A tibble: 1 × 1
Vi
<dbl>
1 8.25
We may also ask how accurate the random effect values for each individual are compared to our simulation.
p1 = plot_coefficients(brms.bin, ranef = TRUE, which_ranef = "Id")
p2 = plot_coefficients(brms.poiss, ranef = TRUE, which_ranef = "Id")
p3 = plot_coefficients(brms.poiss, ranef = TRUE, which_ranef = "Id")
(p1 + p2 + p3 ) & theme_bw(14)As we can see, the random effects are expressed on very different scales, making comparisons difficult!
6 Conclusions on best models for position within group
There is a clear advantage for the CLMM model which performs much better than the binomial or Poisson GLMMs. However, estimating the repeatability for this class of model remains tricky! With non-Gaussian GLMMs, the residual variance is not a statistical parameter estimated from the model, but rather depends on the link function used and the overdispersion type used to fit the model. These issues are now well-resolved for binomial, Poisson and Gamma distribution. However, there is no guidance, to my knowledge, in how to calculate the residual variance with CLMMs. Given these limitations, it seems much more appropriate to compare variability among and within individuals using the leading/following data rather than relying on an individual’s exact position within the group. This should give qualitatively equivalent data on whether some individuals are more likely to position themselves at the front of the foraging chain and therefore seems the much wiser option!
7 Comparing repeatability and variance components between groups
I focus here in the leading/following behaviors as they are much more consistent with a binomial GLMM. For convenience, I simply allocated individuals to male or female categories at random and do not assume any differences among sexes.
As in section 1
7.1 Estimate repeatability by sex
rpt.R.f = rpt(formula = lead ~ 1 + (1|Id),
grname = "Id",
datatype = "Binary",
data = subset(dfsim, Sex == "F"))
rpt.R.m = rpt(formula = lead ~ 1 + (1|Id),
grname = "Id",
datatype = "Binary",
data = subset(dfsim, Sex == "M"))
saveRDS(rpt.R.f, here("outputs/mods/rpt.R.f.rds"))
saveRDS(rpt.R.m, here("outputs/mods/rpt.R.m.rds"))This code store the values for repeatability into an rpt object. We can access the numeric values of each bootstrap from within the R_boot column and plot the distribution or the distribution for the difference between each sexes.
For convenience, I’m calculating an ‘adjusted repeatability’, which correspond to \(R_{adj} = \frac{V_i}{V_i + V_r}\), where \(V_i\) is the among-individual variance and \(V_r\) is the residual variance (a.k.a. the the within-individual variance). In other terms, I’m ‘adjusting’ the repeatability value by leaving out the variance explained by fixed effects from the denominator. We could also consider that sexes contribute to the variation in leading or following during foraging event. In which case, we would compute the ‘unadjusted repeatability’ by including the fixed effect variance (\(V_{fe}\)) in the denominator: \(R_{unadj} = \frac{V_i}{V_i + V_{fe} + V_r}\). This is easily done by setting the adjusted argument to adjusted = T within the rptr() function.
# Inspect rpt objects
summary(rpt.R.f)
Repeatability estimation using glmer method
Call = rpt(formula = lead ~ 1 + (1 | Id), grname = "Id", data = subset(dfsim, Sex == "F"), datatype = "Binary")
Data: 5000 observations
----------------------------------------
Id (50 groups)
Repeatability estimation overview:
R SE 2.5% 97.5% P_permut
Org 0.809 5.83e+02 0.259 2543 NA
Link 0.999 1.19e-03 0.997 1 NA
Bootstrapping:
N Mean Median 2.5% 97.5%
Org 1000 229.197 0.360 0.259 2543
Link 1000 0.999 0.999 0.997 1
Permutation test:
N Mean Median 2.5% 97.5% P_permut
Org 1 NA NA NA NA NA
Link 1 NA NA NA NA NA
Likelihood ratio test:
logLik full model = -8.883293
logLik red. model = -1625.415
D = 3230, df = 1, P = 0
----------------------------------------
summary(rpt.R.m)
Repeatability estimation using glmer method
Call = rpt(formula = lead ~ 1 + (1 | Id), grname = "Id", data = subset(dfsim, Sex == "M"), datatype = "Binary")
Data: 5000 observations
----------------------------------------
Id (50 groups)
Repeatability estimation overview:
R SE 2.5% 97.5% P_permut
Org 0.809 5.53e+02 0.258 2542 NA
Link 0.999 1.11e-03 0.997 1 NA
Bootstrapping:
N Mean Median 2.5% 97.5%
Org 1000 216.128 0.359 0.258 2542
Link 1000 0.999 0.999 0.997 1
Permutation test:
N Mean Median 2.5% 97.5% P_permut
Org 1 NA NA NA NA NA
Link 1 NA NA NA NA NA
Likelihood ratio test:
logLik full model = -8.883293
logLik red. model = -1625.415
D = 3230, df = 1, P = 0
----------------------------------------
7.2 Estimate among and within-individual variance by sex
To get the among and within-individual variances, we need to rerun the rpt function, this time specifying that we want to extract the variance rather than the repeatablities. This can be done by setting the ratio argument to ratio = F
rpt.V.f <- rpt(formula = lead ~ 1 + (1|Id),
grname = c("Id", "Fixed", "Residual"),
datatype = c("Binary"),
data = subset(dfsim, Sex == "F"),
ratio = FALSE)
rpt.V.m <- rpt(formula = lead ~ 1 + (1|Id),
grname = c("Id", "Fixed", "Residual"),
datatype = "Binary",
data = subset(dfsim, Sex == "M"),
ratio = FALSE)
saveRDS(rpt.V.f, here("outputs/mods/rpt.V.f.rds"))
saveRDS(rpt.V.m, here("outputs/mods/rpt.V.m.rds"))7.3 Combining all elements into a nice plot
# Store all vectors of bootstrapped values
Vi_f <- rpt.V.f$R_boot_link$Id
Vi_m <- rpt.V.m$R_boot_link$Id
Vfe_f <- rpt.V.f$R_boot_link$Fixed
Vfe_m <- rpt.V.m$R_boot_link$Fixed
VR_f <- rpt.V.f$R_boot_link$Residual
VR_m <- rpt.V.m$R_boot_link$Residual
R_f <- rpt.R.f$R_boot_link$Id
R_m <- rpt.R.m$R_boot_link$Id
df <- data.frame(Vi = c(Vi_f, Vi_m),
Vfe = c(Vfe_f, Vfe_m),
VR = c(VR_f, VR_m),
R = c(R_f, R_m),
Sex = c(rep("F", length(Vi_f)),
rep("M", length(Vi_m))))
# Store effect sizes
df.2 <- data.frame(delta_Vi = Vi_f - Vi_m,
delta_Vfe = Vfe_f - Vfe_m,
delta_VR = VR_f - VR_m,
delta_R = R_f - R_m)
p1 = df %>%
ggplot(aes(x = Vi, fill = Sex)) +
stat_halfeye(alpha = .6) +
scale_fill_wsj() +
xlab(bquote("Among-individual variance ("*V[i]*")")) +
ylab("Density") +
theme_bw(14)
delta.p1 = df.2 %>%
ggplot(aes(x = delta_Vi)) +
stat_halfeye(alpha = .6) +
xlab(bquote(Delta[V[i]])) +
ylab("Density") +
theme_bw(14)
p1 = p1 + delta.p1
p2 = df %>%
ggplot(aes(x = Vfe, fill = Sex)) +
stat_halfeye(alpha = .6) +
scale_fill_wsj() +
xlab(bquote("Fixed effect variance ("*V[fe]*")")) +
ylab("Density") +
theme_bw(14)
delta.p2 = df.2 %>%
ggplot(aes(x = delta_Vfe)) +
stat_halfeye(alpha = .6) +
xlab(bquote(Delta[V[fe]])) +
ylab("Density") +
theme_bw(14)
p2 = p2 + delta.p2
p3 = df %>%
ggplot(aes(x = Vfe, fill = Sex)) +
stat_halfeye(alpha = .6) +
scale_fill_wsj() +
xlab(bquote("Residual variance ("*V[R]*")")) +
ylab("Density") +
theme_bw(14)
delta.p3 = df.2 %>%
ggplot(aes(x = delta_VR)) +
stat_halfeye(alpha = .6) +
xlab(bquote(Delta[V[fe]])) +
ylab("Density") +
theme_bw(14)
p3 = p3 + delta.p3
p4 = df %>%
ggplot(aes(x = R, fill = Sex)) +
stat_halfeye(alpha = .6) +
scale_fill_wsj() +
xlim(0, 1) +
xlab(bquote("Repeatability (R)")) +
ylab("Density") +
theme_bw(14)
delta.p4 = df.2 %>%
ggplot(aes(x = delta_R)) +
stat_halfeye(alpha = .6) +
xlim(0, 1) +
xlab(bquote(Delta[R])) +
ylab("Density") +
theme_bw(14)
p4 = p4 + delta.p4
plot_var_R = p1 / p2 / p3 / p4
plot_var_RThe output looks a little janky, but that’s not surprising given that we’ve simulated data with no fixed effects. In our case, the only source of variation comes from the amon-individual preferences for being at the front or the back of the group. Hence, the variation explained by fixed effects and residuals reduces to 0 and repeatability equals 1 in our case.